import jieba

with open("西游记.txt","rb") as f:
    s = f.read()
# 得到分词列表
list1 = jieba.lcut(s)
# 单词的次数用字典保存，单词key次数value
counts = {}
for word in list1:
    if len(word) == 1:
        continue
    elif word in ["行者", "大圣", '老孙']:
        rword = '悟空'
    elif word in ["师父", '三藏', '长老']:
        rword = '唐僧'
    elif word in ['悟净']:
        rword = '沙和尚'
    else:
        rword = word
    counts[rword]=counts.get(rword,0)+1

excludes={"一个", "那里", "怎么", "我们", "不知", "两个", "甚么","只见", "不是", "原来", "不敢", "闻言", "如何", "什么"}
for word in excludes:
    del counts[word]
items=list(counts.items())



items.sort(key=lambda x:x[1],reverse=True)
print(items)